Poniżej podjęto próbę analizy danych z WDI oraz predykcji ceny złota na jej podstawię. Z analizy wynika, że wiele z najważniejszych atrybutów które znalazł regresor jest nietypowa np. Czas potrzebny do wybudowania magazynu.
library("knitr")
library("DT")
library("readxl") # Reading excel files
library("tidyr")
library("dplyr")
library("zoo") # NA interpolation
library("corrr")
library("ggplot2")
library("plotly")
library("ggcorrplot")
library("caret")
set.seed(23)
WDI <- read_excel("Data_pack/World_Development_Indicators.xlsx", sheet = 1, na="..")
currencyEx <- read.table("Data_pack/CurrencyExchangeRates.csv", header=TRUE, sep = ",")
WDI <- WDI %>% slice_head(n = 44304) %>%
select(-c("Country Code", "Series Name")) %>% # 2
pivot_longer(3:53, names_to="Year") %>% # 3
pivot_wider(names_from = `Series Code`, values_from = value) %>% # 4
mutate(Year = substr(Year, 1, 4))
# 6. Interpolating and extrapolating all possible values
WDI <- WDI %>%
group_by(`Country Name`) %>%
mutate_at(3:215, na.approx, na.rm=FALSE, rule = 2) %>%
ungroup()
WDI <- WDI %>% arrange(`Country Name`, Year)
# 7. Removing measures/countries with most % of Na's until we are left with no Na's
while(TRUE){
summirize_NAs <- WDI %>%
select(-2) %>%
group_by(`Country Name`) %>%
summarise_all(funs(sum(is.na(.)))) %>%
ungroup();
row_NAs <- summirize_NAs %>%
mutate(sum = rowSums(across(where(is.numeric)))/(ncol(summirize_NAs)-1)/51);
col_NAs <- row_NAs %>%
summarise(across(-1, ~ sum(., is.na(.), 0)/nrow(row_NAs)/51));
max_row <- max(row_NAs %>% select(sum));
max_row_index <- which(row_NAs %>% select(sum)==max_row);
max_col <- max(col_NAs);
max_col_index <- which(col_NAs==max_col);
if(max_col==max_row && max_col==0) break;
if(max_col > max_row) { WDI <- WDI %>% select(-(max_col_index+2));
} else WDI <- WDI %>% slice(-((51*(max_row_index-1)+1):(51*max_row_index)));
}
Po uzyskaniu zbioru bez wartości pustych pozostało 122 z początkowych 213 serii (57.3%) oraz 141 z 208 początkowych krajów (67.8%) z danymi ekstrapolowanymi do wszystkich 51 lat.
dim(WDI)
## [1] 7191 124
kable(summary.data.frame(WDI))
| Country Name | Year | SP.URB.GROW | SP.URB.TOTL.IN.ZS | SP.URB.TOTL | TX.VAL.TRAN.ZS.WT | TM.VAL.TRAN.ZS.WT | BG.GSR.NFSV.GD.ZS | NE.TRD.GNFS.ZS | NY.GDP.TOTL.RT.ZS | EN.ATM.GHGT.KT.CE | SH.ALC.PCAP.LI | IC.WRH.DURS | IC.LGL.DURS | IC.ELC.TIME | IC.TAX.PAYM | SP.DYN.TO65.FE.ZS | SP.DYN.TO65.MA.ZS | SH.STA.SUIC.P5 | SH.STA.SUIC.FE.P5 | SH.STA.SUIC.MA.P5 | IC.LGL.CRED.XQ | BM.GSR.NFSV.CD | BX.GSR.NFSV.CD | SL.EMP.SELF.MA.ZS | SL.EMP.SELF.ZS | SL.EMP.SELF.FE.ZS | IT.NET.SECR | IT.NET.SECR.P6 | SE.SEC.TCHR | SE.SEC.ENRL | IP.JRN.ARTC.SC | SP.RUR.TOTL.ZG | SP.RUR.TOTL.ZS | SP.RUR.TOTL | EG.FEC.RNEW.ZS | ER.H2O.INTR.PC | ER.H2O.INTR.K3 | EG.ELC.RNEW.ZS | SE.PRM.ENRL.TC.ZS | SG.GEN.PARL.ZS | BM.GSR.FCTY.CD | BX.GSR.FCTY.CD | SE.PRM.AGES | BN.KLT.PTXL.CD | BX.PEF.TOTL.CD.WD | SP.POP.TOTL | SP.POP.TOTL.MA.IN | SP.POP.TOTL.MA.ZS | SP.POP.TOTL.FE.ZS | SP.POP.TOTL.FE.IN | SP.POP.GROW | EN.POP.DNST | SP.POP.65UP.TO.ZS | SP.POP.1564.TO.ZS | SP.POP.0014.TO.ZS | EN.ATM.PM25.MC.M3 | EN.ATM.PM25.MC.ZS | EN.ATM.PM25.MC.T1.ZS | EN.ATM.PM25.MC.T2.ZS | EN.ATM.PM25.MC.T3.ZS | SH.DTH.MORT | EN.ATM.NOXE.ZG | EN.ATM.NOXE.KT.CE | EN.ATM.NOXE.EG.ZS | NY.GSR.NFCY.CD | NY.GSR.NFCY.CN | BN.GSR.FCTY.CD | FM.AST.DOMS.CN | NY.GDP.NGAS.RT.ZS | SP.DYN.IMRT.IN | SH.STA.TRAF.P5 | EN.ATM.METH.ZG | EN.ATM.METH.KT.CE | EN.ATM.METH.EG.KT.CE | TX.VAL.MRCH.HI.ZS | SP.DYN.LE00.IN | AG.LND.TOTL.K2 | SL.TLF.TOTL.IN | ST.INT.XPND.CD | SM.POP.TOTL.ZS | IT.NET.USER.ZS | NE.IMP.GNFS.CD | NE.IMP.GNFS.ZS | NY.GNS.ICTR.ZS | NE.DAB.TOTL.ZS | NE.DAB.TOTL.CD | NY.GNS.ICTR.CD | NY.GDS.TOTL.ZS | NY.GDS.TOTL.CD | BX.GSR.MRCH.CD | BM.GSR.MRCH.CD | NY.GDP.PCAP.CD | NY.GDP.PCAP.KD.ZG | NY.GDP.MKTP.KD.ZG | NY.GDP.MKTP.CD | TX.VAL.FUEL.ZS.UN | TM.VAL.FUEL.ZS.UN | TX.VAL.FOOD.ZS.UN | TM.VAL.FOOD.ZS.UN | NE.EXP.GNFS.CD | SL.IND.EMPL.ZS | SL.SRV.EMPL.ZS | SL.AGR.EMPL.ZS | SL.EMP.MPYR.ZS | IC.BUS.DFRN.XQ | SH.STA.DIAB.ZS | SH.XPD.CHEX.PC.CD | SH.XPD.CHEX.GD.ZS | EN.ATM.CO2E.SF.ZS | EN.ATM.CO2E.SF.KT | EN.ATM.CO2E.LF.KT | EN.ATM.CO2E.LF.ZS | EN.ATM.CO2E.GF.KT | EN.ATM.CO2E.GF.ZS | EN.ATM.CO2E.PC | EN.ATM.CO2E.KT | EN.ATM.CO2E.PP.GD | EN.ATM.CO2E.PP.GD.KD | EN.ATM.CO2E.KD.GD | SP.DYN.CBRT.IN | AG.LND.PRCP.MM | FB.ATM.TOTL.P5 | EG.ELC.ACCS.ZS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Length:7191 | Length:7191 | Min. :-187.142 | Min. : 2.845 | Min. : 10522 | Min. :-381.37 | Min. : 0.2916 | Min. : 1.165 | Min. : 0.021 | Min. : 0.0000 | Min. : 21 | Min. : 0.003 | Min. : 27.0 | Min. : 210.0 | Min. : 13.0 | Min. : 3.00 | Min. : 6.464 | Min. : 1.477 | Min. : 0.30 | Min. : 0.000 | Min. : 0.40 | Min. : 0.000 | Min. :1.000e+06 | Min. :0.000e+00 | Min. : 0.39 | Min. : 0.41 | Min. : 0.07 | Min. : 0 | Min. : 0.00 | Min. : 0 | Min. : 0 | Min. : 0.0 | Min. :-235.79245 | Min. : 0.00 | Min. : 0 | Min. : 0.000 | Min. : 0 | Min. : 0.0 | Min. : 0.000 | Min. : 8.141 | Min. : 0.000 | Min. :-2.187e+08 | Min. :-5.061e+07 | Min. :4.000 | Min. :-8.080e+11 | Min. :-2.441e+11 | Min. :8.537e+04 | Min. : 45189 | Min. :44.37 | Min. :23.29 | Min. : 39622 | Min. :-6.7661 | Min. : 0.8232 | Min. : 0.7146 | Min. :46.70 | Min. :12.45 | Min. : 5.861 | Min. : 0.00 | Min. : 0.0000 | Min. : 0.00000 | Min. : 0.00 | Min. : 17 | Min. :-84.585 | Min. : 0 | Min. : 0.000 | Min. :-9.905e+10 | Min. :-4.813e+14 | Min. :-1.052e+11 | Min. :-5.424e+13 | Min. : 0.000000 | Min. : 1.70 | Min. : 0.7 | Min. : -78.979 | Min. : 13.1 | Min. : 0 | Min. : 0.2453 | Min. :18.91 | Min. : 300 | Min. : 57406 | Min. :3.500e+06 | Min. : 0.0326 | Min. : 0.0000 | Min. :0.000e+00 | Min. : 0.00 | Min. :-236.27 | Min. : 21.21 | Min. :5.393e+07 | Min. :-2.601e+10 | Min. :-141.97 | Min. :-7.622e+09 | Min. :5.000e+06 | Min. :5.573e+06 | Min. : 22.8 | Min. :-64.9924 | Min. :-64.047 | Min. :4.061e+07 | Min. : 0.0000 | Min. : 0.00933 | Min. : 0.00058 | Min. : 0.4738 | Min. :6.933e+05 | Min. : 0.28 | Min. : 5.34 | Min. : 0.06 | Min. : 0.000 | Min. :31.95 | Min. : 1.000 | Min. : 4.335 | Min. : 1.453 | Min. : -4.324 | Min. : -114 | Min. : -161.3 | Min. : -6.089 | Min. : 0.0 | Min. : 0.000 | Min. : 0.00433 | Min. : 4 | Min. :0.01479 | Min. :0.01041 | Min. :0.02151 | Min. : 5.90 | Min. : 18.1 | Min. : 0.000 | Min. : 0.5339 | |
| Class :character | Class :character | 1st Qu.: 1.117 | 1st Qu.: 33.870 | 1st Qu.: 1190394 | 1st Qu.: 13.71 | 1st Qu.:27.9935 | 1st Qu.: 8.929 | 1st Qu.: 46.374 | 1st Qu.: 0.5579 | 1st Qu.: 11500 | 1st Qu.: 2.590 | 1st Qu.:142.0 | 1st Qu.: 447.0 | 1st Qu.: 66.0 | 1st Qu.: 12.00 | 1st Qu.:59.478 | 1st Qu.:50.834 | 1st Qu.: 5.00 | 1st Qu.: 2.500 | 1st Qu.: 7.10 | 1st Qu.: 2.000 | 1st Qu.:2.949e+08 | 1st Qu.:1.846e+08 | 1st Qu.:19.82 | 1st Qu.:17.71 | 1st Qu.:14.21 | 1st Qu.: 13 | 1st Qu.: 1.46 | 1st Qu.: 6703 | 1st Qu.: 137241 | 1st Qu.: 28.9 | 1st Qu.: -0.40820 | 1st Qu.:29.69 | 1st Qu.: 1035996 | 1st Qu.: 6.679 | 1st Qu.: 1424 | 1st Qu.: 11.5 | 1st Qu.: 3.175 | 1st Qu.: 17.606 | 1st Qu.: 6.091 | 1st Qu.: 7.935e+07 | 1st Qu.: 1.999e+07 | 1st Qu.:6.000 | 1st Qu.:-5.821e+07 | 1st Qu.: 0.000e+00 | 1st Qu.:2.936e+06 | 1st Qu.: 1457900 | 1st Qu.:48.83 | 1st Qu.:49.82 | 1st Qu.: 1494832 | 1st Qu.: 0.6204 | 1st Qu.: 18.8641 | 1st Qu.: 3.2608 | 1st Qu.:53.38 | 1st Qu.:22.68 | 1st Qu.: 16.469 | 1st Qu.: 99.94 | 1st Qu.: 0.0000 | 1st Qu.: 0.00828 | 1st Qu.: 67.93 | 1st Qu.: 1078 | 1st Qu.:-16.160 | 1st Qu.: 1299 | 1st Qu.: 3.022 | 1st Qu.:-1.106e+09 | 1st Qu.:-1.423e+10 | 1st Qu.:-1.236e+09 | 1st Qu.: 3.969e+09 | 1st Qu.: 0.000000 | 1st Qu.: 12.40 | 1st Qu.:12.4 | 1st Qu.: -6.591 | 1st Qu.: 3250.6 | 1st Qu.: 400 | 1st Qu.: 55.7356 | 1st Qu.:58.32 | 1st Qu.: 48100 | 1st Qu.: 1305780 | 1st Qu.:9.600e+07 | 1st Qu.: 0.9909 | 1st Qu.: 0.0000 | 1st Qu.:1.260e+09 | 1st Qu.: 25.08 | 1st Qu.: 13.54 | 1st Qu.: 97.86 | 1st Qu.:3.733e+09 | 1st Qu.: 5.670e+08 | 1st Qu.: 10.89 | 1st Qu.: 3.471e+08 | 1st Qu.:6.449e+08 | 1st Qu.:9.854e+08 | 1st Qu.: 615.2 | 1st Qu.: -0.6905 | 1st Qu.: 1.033 | 1st Qu.:3.286e+09 | 1st Qu.: 0.5267 | 1st Qu.: 7.32520 | 1st Qu.: 7.09749 | 1st Qu.: 8.8995 | 1st Qu.:9.338e+08 | 1st Qu.:13.42 | 1st Qu.:34.86 | 1st Qu.: 9.48 | 1st Qu.: 1.040 | 1st Qu.:52.87 | 1st Qu.: 4.500 | 1st Qu.: 32.842 | 1st Qu.: 4.188 | 1st Qu.: 0.000 | 1st Qu.: 0 | 1st Qu.: 1694.2 | 1st Qu.: 40.872 | 1st Qu.: 0.0 | 1st Qu.: 0.000 | 1st Qu.: 0.54559 | 1st Qu.: 2251 | 1st Qu.:0.16043 | 1st Qu.:0.12069 | 1st Qu.:0.24574 | 1st Qu.:14.73 | 1st Qu.: 593.0 | 1st Qu.: 3.042 | 1st Qu.: 40.8004 | |
| Mode :character | Mode :character | Median : 2.447 | Median : 52.589 | Median : 3542777 | Median : 24.82 | Median :41.8922 | Median : 13.966 | Median : 66.165 | Median : 2.5902 | Median : 42440 | Median : 5.842 | Median :186.0 | Median : 564.0 | Median : 95.0 | Median : 31.00 | Median :77.064 | Median :63.624 | Median : 8.80 | Median : 4.300 | Median : 12.90 | Median : 5.000 | Median :1.169e+09 | Median :1.049e+09 | Median :38.53 | Median :38.57 | Median :40.40 | Median : 115 | Median : 13.28 | Median : 32950 | Median : 474281 | Median : 288.0 | Median : 0.67063 | Median :47.41 | Median : 3303149 | Median :26.788 | Median : 3914 | Median : 49.0 | Median : 20.917 | Median : 24.983 | Median :11.111 | Median : 5.060e+08 | Median : 1.664e+08 | Median :6.000 | Median : 0.000e+00 | Median : 0.000e+00 | Median :7.838e+06 | Median : 3876199 | Median :49.55 | Median :50.45 | Median : 3984469 | Median : 1.6179 | Median : 55.3736 | Median : 4.6870 | Median :60.62 | Median :34.07 | Median : 24.489 | Median :100.00 | Median : 0.4832 | Median : 41.96430 | Median : 99.90 | Median : 6999 | Median : -1.934 | Median : 4770 | Median : 6.397 | Median :-1.470e+08 | Median :-5.586e+08 | Median :-1.570e+08 | Median : 6.594e+10 | Median : 0.000057 | Median : 31.10 | Median :17.2 | Median : 1.136 | Median : 9410.0 | Median : 1300 | Median : 72.7388 | Median :68.79 | Median : 199810 | Median : 3568471 | Median :4.220e+08 | Median : 3.1633 | Median : 0.1259 | Median :5.679e+09 | Median : 34.86 | Median : 20.56 | Median :103.06 | Median :1.620e+10 | Median : 3.494e+09 | Median : 20.09 | Median : 2.869e+09 | Median :3.482e+09 | Median :4.856e+09 | Median : 1917.2 | Median : 1.9469 | Median : 3.525 | Median :1.419e+10 | Median : 3.0458 | Median :12.34879 | Median : 16.15486 | Median :13.1699 | Median :4.605e+09 | Median :20.82 | Median :48.13 | Median :26.61 | Median : 2.800 | Median :62.11 | Median : 6.400 | Median : 131.969 | Median : 5.503 | Median : 5.405 | Median : 554 | Median : 6703.3 | Median : 64.271 | Median : 579.4 | Median : 3.089 | Median : 2.26642 | Median : 13800 | Median :0.28513 | Median :0.20237 | Median :0.38302 | Median :24.45 | Median :1026.0 | Median : 18.822 | Median : 96.9309 | |
| NA | NA | Mean : 2.721 | Mean : 51.925 | Mean : 17795487 | Mean : 28.64 | Mean :42.3491 | Mean : 20.201 | Mean : 75.522 | Mean : 7.0120 | Mean : 238871 | Mean : 6.517 | Mean :206.1 | Mean : 645.8 | Mean :118.5 | Mean : 31.61 | Mean :71.468 | Mean :61.648 | Mean :11.48 | Mean : 5.527 | Mean : 17.72 | Mean : 4.808 | Mean :1.355e+10 | Mean :1.364e+10 | Mean :43.26 | Mean :44.39 | Mean :45.70 | Mean : 42934 | Mean : 1093.22 | Mean : 156892 | Mean : 2706222 | Mean : 9687.4 | Mean : -0.08267 | Mean :48.08 | Mean : 20129583 | Mean :36.429 | Mean : 22145 | Mean : 290.7 | Mean : 35.380 | Mean : 27.943 | Mean :13.631 | Mean : 1.251e+10 | Mean : 1.204e+10 | Mean :6.235 | Mean :-2.166e+09 | Mean : 2.274e+09 | Mean :3.793e+07 | Mean : 19093379 | Mean :49.70 | Mean :50.30 | Mean : 18831691 | Mean : 1.6401 | Mean : 110.1329 | Mean : 7.1296 | Mean :59.94 | Mean :32.93 | Mean : 29.004 | Mean : 93.35 | Mean : 25.3769 | Mean : 47.63829 | Mean : 77.59 | Mean : 70685 | Mean : 3.346 | Mean : 17539 | Mean : 9.081 | Mean :-3.211e+08 | Mean :-1.635e+12 | Mean :-4.732e+08 | Mean : 3.815e+13 | Mean : 0.280302 | Mean : 45.74 | Mean :18.8 | Mean : 9.857 | Mean : 44625.0 | Mean : 14990 | Mean : 67.4559 | Mean :65.80 | Mean : 846340 | Mean : 18061107 | Mean :4.431e+09 | Mean : 6.9947 | Mean :14.3044 | Mean :5.838e+10 | Mean : 40.42 | Mean : 20.40 | Mean :104.99 | Mean :2.452e+11 | Mean : 6.647e+10 | Mean : 18.98 | Mean : 6.225e+10 | Mean :4.836e+10 | Mean :4.726e+10 | Mean : 7365.6 | Mean : 1.3644 | Mean : 3.076 | Mean :2.451e+11 | Mean : 14.7042 | Mean :14.25896 | Mean : 25.06494 | Mean :14.4835 | Mean :5.899e+10 | Mean :20.44 | Mean :47.70 | Mean :31.86 | Mean : 3.121 | Mean :61.98 | Mean : 6.713 | Mean : 670.953 | Mean : 5.877 | Mean : 17.592 | Mean : 67918 | Mean : 61102.1 | Mean : 63.522 | Mean : 30666.6 | Mean : 12.916 | Mean : 4.52634 | Mean : 165030 | Mean :0.40723 | Mean :0.27200 | Mean :0.61531 | Mean :26.67 | Mean :1147.3 | Mean : 34.736 | Mean : 72.9918 | |
| NA | NA | 3rd Qu.: 4.054 | 3rd Qu.: 70.314 | 3rd Qu.: 12296820 | 3rd Qu.: 39.86 | 3rd Qu.:55.3577 | 3rd Qu.: 22.394 | 3rd Qu.: 95.561 | 3rd Qu.: 8.9319 | 3rd Qu.: 134735 | 3rd Qu.:10.100 | 3rd Qu.:252.0 | 3rd Qu.: 785.0 | 3rd Qu.:148.0 | 3rd Qu.: 43.00 | 3rd Qu.:84.780 | 3rd Qu.:74.000 | 3rd Qu.:14.50 | 3rd Qu.: 7.600 | 3rd Qu.: 21.70 | 3rd Qu.: 6.000 | 3rd Qu.:6.453e+09 | 3rd Qu.:6.239e+09 | 3rd Qu.:66.22 | 3rd Qu.:69.81 | 3rd Qu.:79.33 | 3rd Qu.: 1717 | 3rd Qu.: 187.86 | 3rd Qu.: 106576 | 3rd Qu.: 1827545 | 3rd Qu.: 4386.0 | 3rd Qu.: 1.81238 | 3rd Qu.:66.13 | 3rd Qu.: 10892256 | 3rd Qu.:65.414 | 3rd Qu.: 20576 | 3rd Qu.: 182.5 | 3rd Qu.: 63.263 | 3rd Qu.: 35.319 | 3rd Qu.:19.149 | 3rd Qu.: 4.571e+09 | 3rd Qu.: 1.857e+09 | 3rd Qu.:7.000 | 3rd Qu.: 1.340e+07 | 3rd Qu.: 9.186e+06 | 3rd Qu.:2.580e+07 | 3rd Qu.: 12820131 | 3rd Qu.:50.18 | 3rd Qu.:51.17 | 3rd Qu.: 12878546 | 3rd Qu.: 2.6058 | 3rd Qu.: 112.8378 | 3rd Qu.:10.7652 | 3rd Qu.:65.99 | 3rd Qu.:43.30 | 3rd Qu.: 35.835 | 3rd Qu.:100.00 | 3rd Qu.: 46.7282 | 3rd Qu.: 99.39609 | 3rd Qu.:100.00 | 3rd Qu.: 41540 | 3rd Qu.: 8.927 | 3rd Qu.: 14654 | 3rd Qu.: 10.342 | 3rd Qu.:-2.819e+06 | 3rd Qu.:-4.750e+03 | 3rd Qu.:-1.079e+07 | 3rd Qu.: 7.449e+11 | 3rd Qu.: 0.080898 | 3rd Qu.: 69.50 | 3rd Qu.:25.8 | 3rd Qu.: 13.678 | 3rd Qu.: 32265.0 | 3rd Qu.: 6460 | 3rd Qu.: 84.2482 | 3rd Qu.:74.00 | 3rd Qu.: 653540 | 3rd Qu.: 11985666 | 3rd Qu.:3.319e+09 | 3rd Qu.: 8.9348 | 3rd Qu.:15.2777 | 3rd Qu.:2.723e+10 | 3rd Qu.: 50.60 | 3rd Qu.: 27.38 | 3rd Qu.:110.03 | 3rd Qu.:9.050e+10 | 3rd Qu.: 2.759e+10 | 3rd Qu.: 28.12 | 3rd Qu.: 2.382e+10 | 3rd Qu.:2.370e+10 | 3rd Qu.:2.275e+10 | 3rd Qu.: 6956.9 | 3rd Qu.: 4.2256 | 3rd Qu.: 5.845 | 3rd Qu.:9.269e+10 | 3rd Qu.: 13.3106 | 3rd Qu.:19.05664 | 3rd Qu.: 35.51997 | 3rd Qu.:18.3424 | 3rd Qu.:2.731e+10 | 3rd Qu.:26.79 | 3rd Qu.:62.27 | 3rd Qu.:48.36 | 3rd Qu.: 4.450 | 3rd Qu.:72.43 | 3rd Qu.: 8.300 | 3rd Qu.: 602.116 | 3rd Qu.: 7.470 | 3rd Qu.: 27.662 | 3rd Qu.: 12526 | 3rd Qu.: 33444.9 | 3rd Qu.: 89.747 | 3rd Qu.: 12884.0 | 3rd Qu.: 21.451 | 3rd Qu.: 6.71555 | 3rd Qu.: 74630 | 3rd Qu.:0.46803 | 3rd Qu.:0.32905 | 3rd Qu.:0.67852 | 3rd Qu.:38.20 | 3rd Qu.:1622.0 | 3rd Qu.: 50.471 | 3rd Qu.:100.0000 | |
| NA | NA | Max. : 48.936 | Max. :100.000 | Max. :861289359 | Max. : 100.00 | Max. :98.4674 | Max. :304.276 | Max. :408.362 | Max. :87.5075 | Max. :12355240 | Max. :19.950 | Max. :714.0 | Max. :1785.0 | Max. :602.0 | Max. :147.00 | Max. :95.174 | Max. :92.978 | Max. :92.60 | Max. :39.500 | Max. :147.80 | Max. :12.000 | Max. :5.884e+11 | Max. :8.758e+11 | Max. :92.78 | Max. :95.10 | Max. :99.38 | Max. :46678110 | Max. :277330.58 | Max. :6550540 | Max. :132161360 | Max. :528263.2 | Max. : 29.62834 | Max. :97.16 | Max. :898024053 | Max. :98.343 | Max. :333819 | Max. :5661.0 | Max. :100.000 | Max. :100.236 | Max. :63.750 | Max. : 8.993e+11 | Max. : 1.136e+12 | Max. :8.000 | Max. : 2.827e+11 | Max. : 7.257e+11 | Max. :1.402e+09 | Max. :719160951 | Max. :76.71 | Max. :55.63 | Max. :682951049 | Max. :17.5122 | Max. :1801.8067 | Max. :28.3973 | Max. :86.40 | Max. :50.76 | Max. :100.784 | Max. :100.00 | Max. :100.0000 | Max. :100.00000 | Max. :100.00 | Max. :4471008 | Max. :586.284 | Max. :546990 | Max. :192.227 | Max. : 2.923e+11 | Max. : 1.051e+14 | Max. : 2.578e+11 | Max. : 1.021e+16 | Max. :22.413461 | Max. :190.10 | Max. :64.6 | Max. :1046.901 | Max. :1242150.0 | Max. :746030 | Max. :100.0000 | Max. :84.36 | Max. :16389950 | Max. :787183156 | Max. :1.824e+11 | Max. :82.4923 | Max. :99.6529 | Max. :3.138e+12 | Max. :236.39 | Max. : 87.10 | Max. :261.43 | Max. :2.204e+13 | Max. : 6.257e+12 | Max. : 88.39 | Max. : 6.281e+12 | Max. :2.497e+12 | Max. :2.557e+12 | Max. :118823.6 | Max. : 92.2018 | Max. : 88.958 | Max. :2.143e+13 | Max. :359.2561 | Max. :94.05709 | Max. :136.10844 | Max. :62.4160 | Max. :2.723e+12 | Max. :59.58 | Max. :88.51 | Max. :92.37 | Max. :17.880 | Max. :87.17 | Max. :22.000 | Max. :10623.850 | Max. :20.413 | Max. :216.648 | Max. :7499587 | Max. :2494601.4 | Max. :258.524 | Max. :1498556.2 | Max. :167.418 | Max. :87.68964 | Max. :10313460 | Max. :2.52446 | Max. :1.96865 | Max. :5.35097 | Max. :56.95 | Max. :3240.0 | Max. :288.585 | Max. :100.0000 |
WDI %>% select(-(1:2)) %>% correlate()
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
ggcorrplot(cor(WDI %>% select(-(1:2))), tl.cex = 5, hc.order = TRUE)
# 1
gold_prices <- read.csv("Data_pack/Gold prices.csv", sep=',')
gold_prices <- gold_prices %>%
rename(Price = `USD..AM.`) %>%
mutate(Year = substr(Date, 1, 4)) %>%
select(-1) %>%
group_by(Year) %>%
summarise(across(1, ~ mean(., na.rm = TRUE))) %>%
slice(-c(1,2,54))
# 2
Gold_prediction <- WDI %>% select(-1) %>% group_by(Year) %>% summarise(across(-1, ~ mean(.)))
# 3
Gold_prediction <- Gold_prediction %>% inner_join(gold_prices, by='Year')
p <- Gold_prediction %>%
ggplot( aes(Year, Price)) +
geom_point() +
theme_bw()
ggplotly(p)
trainIndx <- createDataPartition(Gold_prediction$Price,
p = 4/5,
list = FALSE)
#Do not use Year for the model
Gold_prediction_filtered <- Gold_prediction %>% select(-Year)
training <- Gold_prediction_filtered[ trainIndx, ]
testing <- Gold_prediction_filtered[-trainIndx, ]
fitControl <- trainControl(
method = "repeatedcv",
number = 5,
repeats = 5,
verboseIter = FALSE,
returnResamp = "all")
set.seed(12)
rrfFit <- train(Price ~ .,
data = training,
method = 'ranger',
tuneLength = 10,
trControl = fitControl,
num.trees = 700,
importance = "permutation")
trellis.par.set(caretTheme())
plot(rrfFit)
testPred <- predict(rrfFit , testing)
RMSE uzyskany przez model to 170.8670375
10 najważniejszych atrybutów wraz z opisami:
WDI_metadata <- read_excel("Data_pack/World_Development_Indicators.xlsx", sheet = 2, na="..")
imp <- varImp(rrfFit)$importance %>% arrange(-Overall) %>% slice_head(n = 10)
imp
WDI_metadata %>% select(c("Indicator Name", "Long definition")) %>% filter(WDI_metadata$Code %in% rownames(imp))